Dot Pattern Variability
  • Prior Work
    • 2022_Paper
    • Hu_Nosofsky24
  • Dot Patterns
  • Dot Similarity
  • Task

On this page

  • Correlations with Accuracy in Hu & Nosofsky 2024
    • Relationship between catLearn accuracy and binnings of similarity rating
    • Correlations between similarity ratings and CatLearn accuracy
  • Data Inspection & Sanity Checks
    • Prototype set counts
    • Rating Distributions
    • Reaction Time Distributions
    • Individual Subject Ratings
  • Lowest and Highest Rated Pairs
  • Pattern Pair Table
  • View source
  • Edit this page
  • Report an issue

Dot Pattern Similarity

  • Show All Code
  • Hide All Code

  • View Source
Published

April 2, 2024

  • catLearn accuracy refers to accuracy on the categorization task in the Hu & Nosofsky 2024 study.
  • prototype sets refer to sets of 3 prototypes that correspond to a single participant in the Hu & Nosofsky 2024 study.
  • prototype pairs refer to the specific pairs of prototypes that are displayed together on a rating trial (3 pairs per set).
Code
pacman::p_load(dplyr,purrr,tidyr,ggplot2, here, patchwork, 
  conflicted, jsonlite,stringr, gt, knitr, kableExtra, 
  lubridate,ggh4x, lmerTest)
walk(c("dplyr", "lmerTest"), conflict_prefer_all, quiet = TRUE)
options(digits=2, scipen=999, dplyr.summarise.inform=FALSE)
walk(c("fun_plot"), ~ source(here::here(paste0("R/", .x, ".R"))))
mc24_proto <- read.csv(here("Stimulii","mc24_prototypes.csv")) |> mutate(set=paste0(sbjCode,"_",condit)) 
sbj_cat <- read.csv(here("data","mc24_sbj_cat.csv"))

dfiles <- list(path=list.files(here::here("data/dotSim_data"),full.names=TRUE))

d <- map_dfr(dfiles$path, ~read.csv(.x))

d <- map_dfr(dfiles$path, ~{read.csv(.x) |> 
    mutate(sfile=tools::file_path_sans_ext(basename(.x)))}) |> 
  select(-trial_index, -internal_node_id,-trial_type) |>
   mutate(set = paste(str_extract(item_label_1, "^\\d+"),
                     str_extract(item_label_1, "[a-z]+"), sep = "_")) |>
  mutate(pair_label = paste0(item_label_1,"_",item_label_2)) |>
  relocate(sbjCode,date,set,pair_label,trial,item_label_1,item_label_2,response,rt)

setCounts <- d |> 
  pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
  group_by(set) |> summarise(n=n_distinct(sbjCode),resp=mean(response),sd=sd(response)) |> arrange(desc(n))

# length(unique(mc_proto$set)) # 304
setCounts2 <- mc24_proto |> group_by(set) |> 
  slice_head(n=1) |> 
  select(id,file,set) |> 
  left_join(setCounts,by="set") |> 
  mutate(n = ifelse(is.na(n), 0, n), .groups="drop") |> 
  arrange(n) |> ungroup()

pairCounts <- d |> 
  group_by(pair_label,set) |> 
  summarise(n=n(),mean_resp=mean(response),sd=sd(response)) |> arrange(desc(n)) |> ungroup()



patternAvg <- d |> 
  pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
  group_by(item,file) |> 
  summarise(n_rating=n(),resp=mean(response),sd=sd(response)) |> 
  arrange(desc(n_rating))

cat_sim <- sbj_cat |> 
  mutate(item=item_label) |> 
  left_join(patternAvg,by=c("file","item"))  |> arrange(desc(n_rating)) |>
  #remove rows where n_rating is NA, or less than 4
  filter(!is.na(n_rating),n_rating>=12) |> 
  mutate(sim_group = ifelse(resp>6.0,"Very Similar",ifelse(resp<3.5,"Very Dissimilar","Medium"))) |> 
  mutate(sim_group=factor(sim_group,levels=c("Very Dissimilar","Medium","Very Similar"))) 



cat_sim_test <- cat_sim |> 
  filter(Phase==2) 


#cor(cat_sim$resp,cat_sim$Corr)

#  m1 <- lmer(Corr ~ resp + (1|sbjCode), data=cat_sim)
#  summary(m1)

#  m1 <- lmer(Corr ~ resp + (1|Pattern.Type) +  (1|sbjCode), data=cat_sim)
#  summary(m1)

#  m1 <- lmer(Corr ~ resp*Pattern.Type*condit +  (1|sbjCode), data=cat_sim)
#  summary(m1)


# m1 <- lmer(Corr ~ sim_group +  (1|sbjCode), data=cat_sim)
# summary(m1)

# m1 <- lmer(Corr ~ sim_group*condit +  (1|sbjCode), data=cat_sim)
# summary(m1)

# m1 <- lmer(Corr ~ sim_group*condit*Pattern.Type +  (1|sbjCode), data=cat_sim)
# summary(m1)

Correlations with Accuracy in Hu & Nosofsky 2024

Assess # of patterns in various binnings - e.g. quartile, decile

Code
# bin data by rating (resp) into quartiles
t1 <- cat_sim |> 
  mutate(Quartile = ntile(resp, 4))|>
  group_by(Quartile) |>
  summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop") 

t2 <- cat_sim |> 
  mutate(Decile = ntile(resp, 10))|>
  group_by(Decile) |>
  summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop") 


t3 <- cat_sim |> 
  group_by(sim_group) |>
  summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop") 

t1 |> kbl(caption="Quartiles")
Quartiles
Quartile Avg. Similarity Rating sd n_ratings
1 3.7 0.56 133
2 4.6 0.18 158
3 5.2 0.15 170
4 5.9 0.37 120
Code
t2 |> kbl(caption="Deciles")
Deciles
Decile Avg. Similarity Rating sd n_ratings
1 3.1 0.47 65
2 4.0 0.13 72
3 4.3 0.08 77
4 4.6 0.07 84
5 4.8 0.07 86
6 5.0 0.06 80
7 5.2 0.06 82
8 5.5 0.08 79
9 5.8 0.09 78
10 6.3 0.33 58
Code
t3 |> kbl(caption="Extreme Groups")
Extreme Groups
sim_group Avg. Similarity Rating sd n_ratings
Very Dissimilar 3.0 0.44 50
Medium 4.9 0.62 289
Very Similar 6.4 0.32 45

Relationship between catLearn accuracy and binnings of similarity rating

  • generally a negative correlation - higher simimlarity rating -> lower accuracy
  • quartiles and deciles are roughly evenly grouped bins, while the extreme similarity grouping has the bulk of the ratings in the medium group

Quartiles and Deciles

Code
p3 <- cat_sim_test |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Quartile,y=Corr,fill=Quartile)) + 
  stat_bar + 
  facet_wrap(~Pattern.Type) + 
  labs(y="CatLearn Accuracy", x="Similarity Rating Quartile", title="Effect by Pattern Type",fill="Similarity Rating Quartile")


p4 <- cat_sim_test |> 
  mutate(Decile = as.factor(ntile(resp, 10))) |> 
  ggplot(aes(x=Decile,y=Corr,fill=Decile)) + 
  stat_bar + 
  facet_wrap(~Pattern.Type) + 
  labs(y="CatLearn Accuracy", x="Similarity Rating Decile", title="Effect by Pattern Type", fill="Similarity Rating Decile")

p5 <- cat_sim_test |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Quartile,y=Corr,fill=Quartile)) + 
  stat_bar + 
  facet_wrap(~condit) + labs(y="CatLearn Accuracy", x="Similarity Rating Quartile", title="Effect by Training Condition", fill="Similarity Rating Quartile")


p6 <- cat_sim_test |> 
  mutate(Decile = as.factor(ntile(resp, 10))) |> 
  ggplot(aes(x=Decile,y=Corr,fill=Decile)) + 
  stat_bar + 
  facet_wrap(~condit) + labs(y="CatLearn Accuracy", x="Similarity Rating Decile", title="Effect by Training Condition", fill="Similarity Rating Decile")


p7 <- cat_sim_test |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Quartile,y=Corr,fill=Quartile)) + 
  stat_bar + 
  facet_nested_wrap(~condit+Pattern.Type) + labs(y="CatLearn Accuracy", x="Similarity Rating Quintile", title="Effect by Training Condition and Pattern Type")

p3 + p4
p5 + p6
p7
Figure 1: 2024 CatLearn accuracy by different similarity rating groups
Figure 2: 2024 CatLearn accuracy by different similarity rating groups
Figure 3: 2024 CatLearn accuracy by different similarity rating groups

Extreme similarity rating groups

Code
p9 <- cat_sim_test |> 
  ggplot(aes(y=Corr,x=Pattern.Type, fill=sim_group)) + 
  stat_bar + labs(title="Group by Pattern Type",y="CatLearn Accuracy", fill="Similarity Rating Group")

p10 <- cat_sim_test |> 
  ggplot(aes(y=Corr,x=condit, fill=sim_group)) + 
  stat_bar + labs(title="Group by Condit",y="CatLearn Accuracy", fill="Similarity Rating Group")

p11 <- cat_sim_test |> 
  ggplot(aes(y=Corr,x=sim_group, fill=sim_group)) + 
  stat_bar + 
  facet_nested_wrap(~condit+Pattern.Type) +
  labs(title="Separate by Condit and Pattern Type",y="CatLearn Accuracy", fill="Similarity Rating Group") +
  theme(axis.text.x = element_text(size=5,angle = 45, hjust = 0.5, vjust = 0.5))


 (p9 / p10)/p11 + plot_annotation(title="CatLearn accuracy by extreme similarity rating groups") +
  plot_layout(heights = c(1,1,2))
Figure 4: 2024 CatLearn accuracy by different similarity rating groups
  • learning curves
  • Individual Learning Curves
Code
# trSim1 <- cat_sim |> filter(Phase==1) |> 
#   group_by(condit,Block) |> 
#   mutate(Quartile = as.factor(ntile(resp, 4))) |> 
#   ggplot(aes(x=Block,y=Corr, fill=Quartile)) + stat_bar+  facet_wrap(~condit)  +
#    labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Quartile")


# trSim2 <- cat_sim |> filter(Phase==1) |> 
#   group_by(condit,Block) |> 
#   ggplot(aes(x=Block,y=Corr, fill=sim_group)) +
#    stat_bar+  
#    facet_wrap(~condit) +
#   labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Group")




trSim1 <- cat_sim |> filter(Phase==1) |> 
  group_by(condit,Block) |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Block,y=Corr, col=Quartile)) + 
    stat_summary(shape=0,geom="point",fun="mean")+
  stat_summary(geom="line",fun="mean")+ 
   facet_wrap(~condit)  +
   labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Quartile",title = "Training Accuracy by Block and Similarity Quartile")

trSim2 <- cat_sim |> filter(Phase==1) |> 
  group_by(condit,Block) |> 
  ggplot(aes(x=Block,y=Corr, col=sim_group)) +
      stat_summary(shape=0,geom="point",fun="mean")+
  stat_summary(geom="line",fun="mean")+ 
   facet_wrap(~condit) +
  labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Group", title="Training Accuracy by Block and Similarity Extreme similarity Grouping")


trSim1/trSim2
Figure 5: Training accuracy by block and similarity rating
Code
 trIndv <- cat_sim |> filter(Phase==1, !(sim_group=="Medium")) |> 
  group_by(condit,Block) |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Block,y=Corr, col=sim_group)) + 
      stat_summary(shape=0,geom="point",fun="mean")+
  stat_summary(geom="line",fun="mean")+ 
   facet_wrap2(~condit+sim_group+sbjCode) +
  labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Group", title="Training Accuracy by Block and Similarity Extreme similarity Grouping")

trIndv 

#cat_sim |> filter(Phase==1) |> group_by(id) |> summarize(n_sim = n_distinct(sim_group)) |> arrange(desc(n_sim)) |> head(10)

# cat_sim |> filter(Phase==1) |> group_by(id) |> mutate(n_sim = n_distinct(sim_group)) |> filter(n_sim>1)  |> group_by(condit,Block) |> 
#   ggplot(aes(x=Block,y=Corr, col=sim_group)) + 
#       stat_summary(shape=0,geom="point",fun="mean")+
#   stat_summary(geom="line",fun="mean")+ 
#    facet_wrap2(~condit+sbjCode)
Figure 6: Training accuracy by block and similarity rating

Correlations between similarity ratings and CatLearn accuracy

Code
p1 <- cat_sim_test |> ggplot(aes(x=resp,y=Corr,col=Pattern.Type,fill=Pattern.Type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
 labs(y="CatLearn Accuracy", x="Similarity Rating")

p2 <- cat_sim_test |> ggplot(aes(x=resp,y=Corr, col=Pattern.Type,fill=Pattern.Type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~condit) + labs(y="CatLearn Accuracy", x="Similarity Rating")


p3 <- cat_sim_test |> 
mutate(Decile = ntile(resp, 10)) |> 
ggplot(aes(x=Decile,y=Corr, col=Pattern.Type,fill=Pattern.Type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
 labs(y="CatLearn Accuracy", x="Similarity Rating Decile")


p4 <- cat_sim_test |> 
mutate(Decile = ntile(resp, 10)) |> 
ggplot(aes(x=Decile,y=Corr, col=condit,fill=condit)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
 labs(y="CatLearn Accuracy", x="Similarity Rating Decile", fill="Training Condition", col="Training Condition")


(p1+p2) / (p3+p4)
Figure 7: Correlations between similarity ratings and CatLearn accuracy

Stucture of dataframe after merging similarity ratings with CatLearn accuracy

  • Each subject in the 2024 study has a similarity score for each of their 3 categories. (averaged over 2 comparisons with that categories prototype)
  • The same category similarity scores are then compared to their accuracy for each of the 5 Pattern Tyeps (old, prototype, new low, new med, new high)
  • 5 Pattern types * 3 Categories = 15 comparisons per subject
Table 1: Example of how data is structure after combining similarity ratings with CatLearn accuracy. 5 Pattern types * 3 Categories = 15 rows per subject in the 2024 study
sbjCode condit Category Pattern.Type n CatLearn Accuracy n_rating Category Similarity sd item
316 high 1 prototype 1 1 60 5.1 2.6 316_high_1_285
316 high 1 old 9 0.4 60 5.1 2.6 316_high_1_285
316 high 1 new_low 3 0.3 60 5.1 2.6 316_high_1_285
316 high 1 new_med 6 0.3 60 5.1 2.6 316_high_1_285
316 high 1 new_high 9 0.4 60 5.1 2.6 316_high_1_285
316 high 2 prototype 1 0 60 3.8 2 316_high_2_327
316 high 2 old 9 0.7 60 3.8 2 316_high_2_327
316 high 2 new_low 3 0.7 60 3.8 2 316_high_2_327
316 high 2 new_med 6 0.2 60 3.8 2 316_high_2_327
316 high 2 new_high 9 0.2 60 3.8 2 316_high_2_327
316 high 3 prototype 1 0 60 5.2 2.4 316_high_3_287
316 high 3 old 9 0.1 60 5.2 2.4 316_high_3_287
316 high 3 new_low 3 0.7 60 5.2 2.4 316_high_3_287
316 high 3 new_med 6 0 60 5.2 2.4 316_high_3_287
316 high 3 new_high 9 0.3 60 5.2 2.4 316_high_3_287
Code
cat_sim_test %>% # round all numerics except sbjCode to 2 decimal places
 mutate(across(where(is.numeric), ~round(., 1))) |> select(-id,-sim_group,-item_label) |> 
  relocate(item,file, .after=sd) |>
  select(-Phase,-Block) |> 
  rename("Category Similarity" = resp, "CatLearn Accuracy" = Corr) |>
   DT::datatable(options = list(pageLength = 6))
# cat_sim_test %>% # round all numerics except sbjCode to 2 decimal places
#      mutate(across(where(is.numeric), ~round(., 1))) |> select(-id,-sim_group,-item_label) |> 
#      filter(sbjCode==316) |> 
#       relocate(item,file, .after=sd) |>
#       select(-Phase,-Block, -file) |> 
#       rename("Category Similarity" = resp, "CatLearn Accuracy" = Corr) |> pander::pandoc.table(style="rmarkdown",split.table=Inf)
Table 2: Example of how data is structure after combining similarity ratings with CatLearn accuracy. 5 Pattern types * 3 Categories = 15 rows per subject in the 2024 study

Data Inspection & Sanity Checks

Code
avg_set_rating <- setCounts2 |> summarise("Avg Ratings Per Set" = mean(n)) |> pull(1)

d |> 
  summarize("N Subjects" = n_distinct(sbjCode), "N Prototype Sets" = n_distinct(set)) |> 
  mutate("Avg Ratings Per Set" = avg_set_rating) |>
  kbl()
Table 3: Current counts of unique subjects, and prototype sets
N Subjects N Prototype Sets Avg Ratings Per Set
71 304 24

Prototype set counts

 

Code
setCounts2 |> group_by(n) |> summarise(nc=n()) |> rename("Number of times prototype set has been included in the study"=n, "Number of prototype sets with this count"=nc) |> gt() |> 
  tab_spanner(label = "Prototype Set Counts") |> 
  tab_header(title = "Prototype Set Counts") |> 
  tab_source_note(
    "Note: The number of times a prototype set has been included in the study is equal to the number of participants who rated the set."
  )
Prototype Set Counts
Number of times prototype set has been included in the study Number of prototype sets with this count
10 1
13 1
15 1
16 6
17 6
18 12
19 15
20 22
21 28
22 22
23 29
24 35
25 21
26 23
27 28
28 15
29 17
30 6
31 7
32 5
33 2
34 1
35 1
Note: The number of times a prototype set has been included in the study is equal to the number of participants who rated the set.
Code
# d |> filter(sbjCode==11) |> select(sbjCode,date,trial,pair_label,set,rt,time_elapsed,time)

# d |> group_by(sbjCode,set) |> 
#   summarize (n=n()) |>
#   gt()

#d |> group_by(sbjCode, item_label_1, item_label_2) |> summarise(n=n())

# (1-.33)^8
# (factorial(8)/(factorial(6)*factorial(8-6))) * (.33^6)*((1-.33)^(8-6))
# (factorial(8)/(factorial(7)*factorial(8-7))) *(.33^6)*((1-.33)^(8-7))
# (factorial(8)/(factorial(8)*factorial(8-8))) *(.33^6)*(1-.33)^(8-8)

# d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
#   group_by(sbjCode, item) |> summarise(n=n())

# patternCounts <- d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
#   group_by(item) |> summarise(n=n(),resp=mean(response),sd=sd(response)) |> arrange(desc(n))



# d |> 
#     pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> select(sbjCode,set,pair_label,item_label,item,response) |>  group_by(set) |>
#     summarize(n=n_distinct(sbjCode)) |> arrange(desc(n)) 




# d |> group_by(sbjCode, file) |> summarise(n=n())
# d |> group_by(sbjCode, set) |> summarise(n=n())

# d |> group_by(sbjCode) |> summarise(n_distinct(file))
# d |> group_by(sbjCode) |> summarise(n_distinct(set))


# sp <- setCounts2 |> 
#   mutate(set=reorder(set,n)) |>
#   ggplot(aes(x=set,y=n)) +
#    geom_col() +
#    theme(legend.title=element_blank(),
#       axis.text.x = element_text(size=5,angle = 90, hjust = 0.5, vjust = 0.5)) +
#     labs(x="Prototype Set", y="Number of Participants to rate set") 

sh <- setCounts2 |> 
  ggplot(aes(x=n)) + geom_histogram(binwidth = 1) +
  scale_x_continuous(breaks=seq(0, max(setCounts2$n), by = 1)) +
  geom_text(stat="count", aes(label=..count..), vjust=-0.5) +
  labs(x="Number of times prototype set has been included in the study", 
  y="Number of prototype sets for each count") 


#sp/sh

sh

Prototype set counts

Prototype set counts

 

Rating Distributions

Code
pgr <- d |> 
  ggplot(aes(x=response))+geom_histogram(binwidth=1) + 
      scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) + labs(title="Aggregate Rating Distribution", x="Rating", y="Count") 

pir <- d |>  ggplot(aes(x=response))+
      geom_histogram(binwidth=1) + 
      facet_wrap(~sbjCode) + 
      scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) + labs(title="Rating Distribution per Sbj.", x="Rating", y="Count") 

pgr/pir

Rating distributions

Rating distributions

Reaction Time Distributions

Code
prtg <- d |> ggplot(aes(x=rt))+
  geom_density() + 
  labs(title="Aggregate Reaction Time Distribution", x="Reaction Time (ms)", y="Density")

prtid <- d |> ggplot(aes(x=rt))+geom_density() + 
  facet_wrap(~sbjCode,scale="free_x") + labs(title="Reaction Time Distribution per Sbj.", x="Reaction Time (ms)", y="Density")

prtg/prtid

Reaction time distributions

Reaction time distributions

Individual Subject Ratings

Code
# d |> summarize(n=n(), n_distinct(sbjCode), n_distinct(file), n_distinct(set), n_distinct(trial), n_distinct(item_label_1), n_distinct(item_label_2))


# d %>%
#   filter(sbjCode == 11) %>%
#   select(sbjCode, date, trial, set, rt, time) %>%
#   mutate(time_parsed = parse_date_time(paste(date, time), orders = c("mdY IMS p", "mdy IMS p"))) %>%
#   group_by(sbjCode, date) %>%
#   summarise(start_time = min(time_parsed), end_time = max(time_parsed)) %>%
#   mutate(endTimeMinusStart = end_time - start_time)

plot_hist_sbj <- function(id) {
  d |> filter(sbjCode==id) |>
    ggplot(aes(x = response)) +
    geom_histogram(binwidth=1,fill = 'dodgerblue4') +
    scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) +
    theme_minimal() +
    theme(axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.x=element_text(size=26))  
}


sbj_sum <- d |> group_by(sbjCode) |> 
#filter(sbjCode<5) |>
  mutate(time_parsed = parse_date_time(paste(date, time), orders = c("mdY IMS p", "mdy IMS p"))) |>
  summarize ("Mean Rating"=mean(response),
  "SD Rating"=sd(response), 
  "Mean RT"=mean(rt)/1000, 
  #"Total Time (min)" = max(time_elapsed)/60000,
  "Total Time (min)" = round(difftime(max(time_parsed), min(time_parsed), units = "mins"),1),
  n_prototype_sets = n_distinct(set), 
  "N Trials" = n_distinct(trial)) |> 
  mutate("Response_Distribution"=sbjCode) 

  sbj_sum |> gt() |> 
    text_transform(
    locations = cells_body(columns = 'Response_Distribution'),
    fn = function(column) {
      map(column, plot_hist_sbj) |>
        ggplot_image(height = px(80), aspect_ratio = 3)
    }
    )
sbj_sum |> mutate(total_time=as.numeric(`Total Time (min)`)) |> 
  rename("Subject"=sbjCode, "N_Sets" = n_prototype_sets) |> 
  summarize("Median Completition time (min)"=median(total_time), 
  "Average Completion Time" = mean(total_time), 
  "Min Completion Time (min)" = min(total_time), 
  "Max Completion Time (min)" = max(total_time)) |> kbl()
Table 4: Individual Subject Ratings
sbjCode Mean Rating SD Rating Mean RT Total Time (min) n_prototype_sets N Trials Response_Distribution
2 6.3 2.0 2.22 14.7 101 303
3 4.7 1.6 1.62 11.6 101 303
4 4.5 1.6 3.52 21.1 101 303
5 4.0 2.0 2.02 13.5 101 303
7 5.1 2.8 2.79 17.4 101 303
8 5.0 2.4 2.10 13.9 101 303
9 4.7 2.1 2.81 17.2 101 303
10 4.6 1.7 2.45 15.8 101 303
11 2.8 1.8 5.80 32.4 101 303
12 4.3 2.6 3.76 22.2 101 303
13 5.8 2.4 1.21 9.5 101 303
14 4.5 1.8 3.74 22 101 303
15 5.1 2.3 2.11 14 101 303
16 6.1 2.1 1.62 11.5 101 303
17 5.5 2.7 1.05 8.7 101 303
18 2.5 2.5 2.13 14.2 101 303
19 4.4 2.3 1.91 13 101 303
20 5.7 1.6 2.18 14.4 101 303
21 4.6 2.3 3.93 23.1 101 303
22 4.4 1.6 3.44 20.6 101 303
23 3.3 2.5 2.00 13.5 101 303
24 4.7 2.3 1.51 11.1 101 303
25 5.2 1.6 3.06 18.8 101 303
26 4.7 2.0 1.83 12.6 101 303
27 4.5 2.6 2.69 17 101 303
28 4.7 2.1 6.25 34.9 101 303
29 4.9 2.2 2.75 17.2 101 303
30 5.3 1.9 0.65 6.6 101 303
31 6.6 2.4 1.14 9 101 303
32 4.0 1.8 6.43 35.7 101 303
33 5.1 3.3 1.40 10.3 101 303
34 5.2 2.5 2.77 17.4 101 303
35 4.7 2.2 3.07 18.9 101 303
36 6.3 2.0 1.46 10.7 101 303
37 5.8 1.9 1.42 10.6 101 303
38 5.8 2.4 1.51 10.8 101 303
39 5.7 1.7 2.61 16.2 101 303
40 4.1 2.0 1.97 13.3 101 303
41 5.1 2.4 3.53 21.1 101 303
42 4.8 1.4 3.57 21.4 101 303
43 5.3 2.0 1.82 12.4 101 303
44 3.3 2.6 2.05 13.7 101 303
45 4.6 1.9 3.31 20.1 101 303
46 5.7 1.6 4.92 28 101 303
48 4.7 2.4 3.26 1328 170 303
49 3.7 2.0 3.07 18.6 101 303
50 4.5 1.8 3.05 18.8 101 303
51 4.8 2.7 4.13 24.1 101 303
52 5.2 2.0 3.45 20.7 101 303
53 6.6 1.9 1.74 12.2 101 303
54 4.7 1.7 1.93 13.1 101 303
55 4.7 2.2 3.59 21.4 101 303
56 5.6 1.5 1.89 12.9 101 303
57 4.7 1.7 5.53 31.3 101 303
58 5.4 2.1 1.90 12.9 101 303
59 4.3 1.7 2.40 15.5 101 303
60 6.0 2.5 0.66 6.8 101 303
61 4.9 1.6 2.25 14.3 101 303
62 5.0 2.4 1.73 12 101 303
63 4.7 2.1 3.77 22.3 101 303
64 4.0 2.0 2.31 15.1 101 303
65 4.0 2.0 2.98 18.1 101 303
66 5.0 3.4 3.10 19 101 303
67 4.5 2.5 2.18 14.3 101 303
68 4.1 2.3 2.52 16.1 101 303
69 4.2 2.2 1.90 13 101 303
70 4.3 2.0 2.46 15.8 101 303
71 5.1 1.9 3.49 21 101 303
72 4.7 2.0 2.12 13.8 101 303
73 5.6 2.4 1.39 10.3 101 303
74 5.5 1.9 2.90 17.8 101 303
Median Completition time (min) Average Completion Time Min Completion Time (min) Max Completion Time (min)
16 35 6.6 1328

Lowest and Highest Rated Pairs

Code
# patternCounts |> filter(n>=8) |>  slice_min(resp)
# patternCounts |> filter(n>=8) |>  slice_max(resp)

# setCounts |> filter(n>=24) |>  slice_min(resp)
# setCounts |> filter(n>=24) |>  slice_max(resp)


# pairCounts |> filter(n>=5) |>  slice_min(resp,n=2)
# pairCounts |> filter(n>=5) |>  slice_max(resp)

min_resp=7
n_show=3

d %>% filter(pair_label %in% {pairCounts |> filter(n>=min_resp) |>  
  slice_min(mean_resp,n=n_show, with_ties=FALSE) |> pull(pair_label)} ) |>
  group_by(pair_label) |>
  slice_head(n=1) %>%
  plot_dotsAll() + 
  plot_annotation(title=glue::glue("Lowest rated pairs ( out of sets with n>={min_resp} ratings)"), theme = theme(plot.title = element_text(hjust = 0.4)))

Code
d %>% filter(pair_label %in% {pairCounts |> filter(n>=min_resp) |>  
  slice_max(mean_resp,n=n_show, with_ties=FALSE) |> pull(pair_label)} ) |>
  group_by(pair_label) |>
  slice_head(n=1) %>%
  plot_dotsAll() +  
  plot_annotation(title=glue::glue("Highest rated pairs ( out of sets with n>={min_resp} ratings)"), theme = theme(plot.title = element_text(hjust = 0.4)))

Pattern Pair Table

All pairs with >=25 ratings

  • click on column headers to change sort order
    • e.g. clicking on “Mean Rating” will toggle showing the pairs rated most similar or most dissimilar
    • clicking on “SD” will toggle showing the pairs with the most or least agreement in ratings
  • note your screen may need to be at full width to see all columns
Code
plot_hist_pair <- function(Pair) {
  d |> filter(pair_label==Pair) |>
    ggplot(aes(x = response)) +
    geom_histogram(binwidth=1,fill = 'dodgerblue4') +
    scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) +
    theme_minimal() +
    theme(axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.x=element_text(size=26))  
}



pat_table_plot <- function(Pair){

  df <- d |> filter(pair_label==Pair) |> slice_head(n=1) 

    pat1 <- df %>%
          mutate(pattern_1 = purrr::map(pattern_1, jsonlite::fromJSON)) %>%
          unnest(pattern_1) %>%
          mutate(y=-y, pat=item_label_1) |> select(pair_label,x,y,pat)

    pat2 <- df %>%
          mutate(pattern_2 = purrr::map(pattern_2, jsonlite::fromJSON)) %>%
          unnest(pattern_2) %>%
          mutate(y=-y, pat=item_label_2) |> select(pair_label,x,y,pat)

    pat <- rbind(pat1,pat2)

     pat |> 
    ggplot(aes(x = x, y = y,fill=pat,col=pat)) +
          geom_point(alpha=2) +
          coord_cartesian(xlim = c(-25, 35.5), ylim = c(-25, 25)) +
          theme_minimal() +
          facet_wrap2(~pat,ncol=2,axes="all") + 
          #theme_blank +
          theme_void() + 
          theme(strip.text = element_text(size = 7,hjust=.5),
                panel.spacing.x=unit(-7.3, "lines"), 
                #strip.background = element_rect(colour = "black", linewidth = 2),
                legend.position = "none",
        axis.line.y = element_line(colour = "black", linewidth = .1)) 
}



p5 <- pairCounts |> filter(n>=32) 

p5 |> 
arrange(mean_resp) |>
relocate(pair_label,.after=sd) |>
rename("Pair"=pair_label, "N"=n, "Mean Rating"=mean_resp, "SD"=sd) |>
mutate(Rating_Dist=Pair) |> 
#group_by(Pair) |> 
gt() |> 
tab_options(table.font.size = px(8L)) |>
  cols_width(
    set ~ px(116),
    Pair ~ px(415),
    `N` ~ px(50),
    `Mean Rating` ~ px(90),
    SD ~ px(55)
  ) |>  
  fmt_number(decimals = 1) |> #fmt_integer() |>
  cols_align('left', columns = set) |> 
  text_transform(
    locations = cells_body(columns = Pair),
    fn = function(column) {
      map(column, pat_table_plot) |>
        ggplot_image(height = px(230), aspect_ratio = 1.8)
    }
  ) |>
  text_transform(
    locations = cells_body(columns = Rating_Dist),
    fn = function(column) {
      map(column, plot_hist_pair) |>
        ggplot_image(height = px(150), aspect_ratio = 1)
    }
  ) |>  
   opt_interactive(page_size_default=5, 
     use_page_size_select= TRUE, use_search=TRUE, use_resizers=TRUE,use_filters=TRUE, page_size_values = c(5, 10, 25, 50, 100)) 
Code
# #| fig-cap: dot plots
# #| fig-width: 10
# #| fig-height: 12

# d %>% filter(trial==1) %>%
#   plot_dots()

# d %>% filter(trial==1) %>%
#   plot_dots2()

# d %>% filter(trial<2) %>%
#   plot_dotsAll()
Code
#| fig-width: 6
#| fig-height: 9


# d %>% filter(file %in% unique(d$file[1])) %>%
#   plot_dotsAll()

# plot_dotsAll_orig <- function(df) {
#   plots <- list()

#   for (i in 1:nrow(df)) {
#     p1 <- df[i, ] %>%
#       pivot_longer(cols = starts_with("x"), names_to = "dot", values_to = "x") %>%
#       mutate(dot = as.numeric(str_remove(dot, "x"))) %>%
#       pivot_longer(cols = starts_with("y"), names_to = "dot2", values_to = "y") %>%
#       mutate(dot2 = as.numeric(str_remove(dot2, "y"))) %>%
#       filter(dot == dot2) %>%
#       ggplot(aes(x = x, y = y)) +
#       geom_point() +
#       coord_cartesian(xlim = c(-25, 25), ylim = c(-25, 25)) +
#       theme_minimal() +
#       labs(title = df$id[i]) + theme_blank

#     plots <- append(plots, list(p1))
#   }

#   patchwork::wrap_plots(plots, ncol = 1)
# }

# mc24_proto |> filter(file %in% unique(d$file[1])) %>% plot_dotsAll_orig()
Source Code
---
title: Dot Pattern Similarity
date: last-modified
lightbox: true
toc: true
page-layout: full
toc-depth: 3
code-fold: true
code-tools: true
execute: 
  warning: false
  eval: true
---


- catLearn accuracy refers to accuracy on the categorization task in the Hu & Nosofsky 2024 study.
- prototype sets refer to sets of 3 prototypes that correspond to a single participant in the Hu & Nosofsky 2024 study. 
- prototype pairs refer to the specific pairs of prototypes that are displayed together on a rating trial (3 pairs per set). 


```{r}
pacman::p_load(dplyr,purrr,tidyr,ggplot2, here, patchwork, 
  conflicted, jsonlite,stringr, gt, knitr, kableExtra, 
  lubridate,ggh4x, lmerTest)
walk(c("dplyr", "lmerTest"), conflict_prefer_all, quiet = TRUE)
options(digits=2, scipen=999, dplyr.summarise.inform=FALSE)
walk(c("fun_plot"), ~ source(here::here(paste0("R/", .x, ".R"))))
mc24_proto <- read.csv(here("Stimulii","mc24_prototypes.csv")) |> mutate(set=paste0(sbjCode,"_",condit)) 
sbj_cat <- read.csv(here("data","mc24_sbj_cat.csv"))

dfiles <- list(path=list.files(here::here("data/dotSim_data"),full.names=TRUE))

d <- map_dfr(dfiles$path, ~read.csv(.x))

d <- map_dfr(dfiles$path, ~{read.csv(.x) |> 
    mutate(sfile=tools::file_path_sans_ext(basename(.x)))}) |> 
  select(-trial_index, -internal_node_id,-trial_type) |>
   mutate(set = paste(str_extract(item_label_1, "^\\d+"),
                     str_extract(item_label_1, "[a-z]+"), sep = "_")) |>
  mutate(pair_label = paste0(item_label_1,"_",item_label_2)) |>
  relocate(sbjCode,date,set,pair_label,trial,item_label_1,item_label_2,response,rt)

setCounts <- d |> 
  pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
  group_by(set) |> summarise(n=n_distinct(sbjCode),resp=mean(response),sd=sd(response)) |> arrange(desc(n))

# length(unique(mc_proto$set)) # 304
setCounts2 <- mc24_proto |> group_by(set) |> 
  slice_head(n=1) |> 
  select(id,file,set) |> 
  left_join(setCounts,by="set") |> 
  mutate(n = ifelse(is.na(n), 0, n), .groups="drop") |> 
  arrange(n) |> ungroup()

pairCounts <- d |> 
  group_by(pair_label,set) |> 
  summarise(n=n(),mean_resp=mean(response),sd=sd(response)) |> arrange(desc(n)) |> ungroup()



patternAvg <- d |> 
  pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
  group_by(item,file) |> 
  summarise(n_rating=n(),resp=mean(response),sd=sd(response)) |> 
  arrange(desc(n_rating))

cat_sim <- sbj_cat |> 
  mutate(item=item_label) |> 
  left_join(patternAvg,by=c("file","item"))  |> arrange(desc(n_rating)) |>
  #remove rows where n_rating is NA, or less than 4
  filter(!is.na(n_rating),n_rating>=12) |> 
  mutate(sim_group = ifelse(resp>6.0,"Very Similar",ifelse(resp<3.5,"Very Dissimilar","Medium"))) |> 
  mutate(sim_group=factor(sim_group,levels=c("Very Dissimilar","Medium","Very Similar"))) 



cat_sim_test <- cat_sim |> 
  filter(Phase==2) 


#cor(cat_sim$resp,cat_sim$Corr)

#  m1 <- lmer(Corr ~ resp + (1|sbjCode), data=cat_sim)
#  summary(m1)

#  m1 <- lmer(Corr ~ resp + (1|Pattern.Type) +  (1|sbjCode), data=cat_sim)
#  summary(m1)

#  m1 <- lmer(Corr ~ resp*Pattern.Type*condit +  (1|sbjCode), data=cat_sim)
#  summary(m1)


# m1 <- lmer(Corr ~ sim_group +  (1|sbjCode), data=cat_sim)
# summary(m1)

# m1 <- lmer(Corr ~ sim_group*condit +  (1|sbjCode), data=cat_sim)
# summary(m1)

# m1 <- lmer(Corr ~ sim_group*condit*Pattern.Type +  (1|sbjCode), data=cat_sim)
# summary(m1)





```





## Correlations with Accuracy in Hu & Nosofsky 2024



#### Assess # of patterns in various binnings - e.g. quartile, decile
```{r}
# bin data by rating (resp) into quartiles
t1 <- cat_sim |> 
  mutate(Quartile = ntile(resp, 4))|>
  group_by(Quartile) |>
  summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop") 

t2 <- cat_sim |> 
  mutate(Decile = ntile(resp, 10))|>
  group_by(Decile) |>
  summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop") 


t3 <- cat_sim |> 
  group_by(sim_group) |>
  summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop") 

t1 |> kbl(caption="Quartiles")
t2 |> kbl(caption="Deciles")
t3 |> kbl(caption="Extreme Groups")
```







### Relationship between catLearn accuracy and binnings of similarity rating

- generally a negative correlation - higher simimlarity rating -> lower accuracy
- quartiles and deciles are roughly evenly grouped bins, while the extreme similarity grouping has the bulk of the ratings in the medium group

#### Quartiles and Deciles
```{r}
#| label: fig-sim-acc-test
#| fig-cap: "2024 CatLearn accuracy by different similarity rating groups"
#| fig-width: 12
#| fig-height: 10

p3 <- cat_sim_test |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Quartile,y=Corr,fill=Quartile)) + 
  stat_bar + 
  facet_wrap(~Pattern.Type) + 
  labs(y="CatLearn Accuracy", x="Similarity Rating Quartile", title="Effect by Pattern Type",fill="Similarity Rating Quartile")


p4 <- cat_sim_test |> 
  mutate(Decile = as.factor(ntile(resp, 10))) |> 
  ggplot(aes(x=Decile,y=Corr,fill=Decile)) + 
  stat_bar + 
  facet_wrap(~Pattern.Type) + 
  labs(y="CatLearn Accuracy", x="Similarity Rating Decile", title="Effect by Pattern Type", fill="Similarity Rating Decile")

p5 <- cat_sim_test |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Quartile,y=Corr,fill=Quartile)) + 
  stat_bar + 
  facet_wrap(~condit) + labs(y="CatLearn Accuracy", x="Similarity Rating Quartile", title="Effect by Training Condition", fill="Similarity Rating Quartile")


p6 <- cat_sim_test |> 
  mutate(Decile = as.factor(ntile(resp, 10))) |> 
  ggplot(aes(x=Decile,y=Corr,fill=Decile)) + 
  stat_bar + 
  facet_wrap(~condit) + labs(y="CatLearn Accuracy", x="Similarity Rating Decile", title="Effect by Training Condition", fill="Similarity Rating Decile")


p7 <- cat_sim_test |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Quartile,y=Corr,fill=Quartile)) + 
  stat_bar + 
  facet_nested_wrap(~condit+Pattern.Type) + labs(y="CatLearn Accuracy", x="Similarity Rating Quintile", title="Effect by Training Condition and Pattern Type")

p3 + p4
p5 + p6
p7



```


#### Extreme similarity rating groups
```{r}
#| label: fig-sim-acc-test2
#| fig-cap: "2024 CatLearn accuracy by different similarity rating groups"
#| fig-width: 13
#| fig-height: 15

p9 <- cat_sim_test |> 
  ggplot(aes(y=Corr,x=Pattern.Type, fill=sim_group)) + 
  stat_bar + labs(title="Group by Pattern Type",y="CatLearn Accuracy", fill="Similarity Rating Group")

p10 <- cat_sim_test |> 
  ggplot(aes(y=Corr,x=condit, fill=sim_group)) + 
  stat_bar + labs(title="Group by Condit",y="CatLearn Accuracy", fill="Similarity Rating Group")

p11 <- cat_sim_test |> 
  ggplot(aes(y=Corr,x=sim_group, fill=sim_group)) + 
  stat_bar + 
  facet_nested_wrap(~condit+Pattern.Type) +
  labs(title="Separate by Condit and Pattern Type",y="CatLearn Accuracy", fill="Similarity Rating Group") +
  theme(axis.text.x = element_text(size=5,angle = 45, hjust = 0.5, vjust = 0.5))


 (p9 / p10)/p11 + plot_annotation(title="CatLearn accuracy by extreme similarity rating groups") +
  plot_layout(heights = c(1,1,2))
```



::: {.panel-tabset}

### learning curves
```{r}
#| label: fig-train-sim
#| fig-cap: "Training accuracy by block and similarity rating" 
#| fig-width: 10
#| fig-height: 9


# trSim1 <- cat_sim |> filter(Phase==1) |> 
#   group_by(condit,Block) |> 
#   mutate(Quartile = as.factor(ntile(resp, 4))) |> 
#   ggplot(aes(x=Block,y=Corr, fill=Quartile)) + stat_bar+  facet_wrap(~condit)  +
#    labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Quartile")


# trSim2 <- cat_sim |> filter(Phase==1) |> 
#   group_by(condit,Block) |> 
#   ggplot(aes(x=Block,y=Corr, fill=sim_group)) +
#    stat_bar+  
#    facet_wrap(~condit) +
#   labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Group")




trSim1 <- cat_sim |> filter(Phase==1) |> 
  group_by(condit,Block) |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Block,y=Corr, col=Quartile)) + 
    stat_summary(shape=0,geom="point",fun="mean")+
  stat_summary(geom="line",fun="mean")+ 
   facet_wrap(~condit)  +
   labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Quartile",title = "Training Accuracy by Block and Similarity Quartile")

trSim2 <- cat_sim |> filter(Phase==1) |> 
  group_by(condit,Block) |> 
  ggplot(aes(x=Block,y=Corr, col=sim_group)) +
      stat_summary(shape=0,geom="point",fun="mean")+
  stat_summary(geom="line",fun="mean")+ 
   facet_wrap(~condit) +
  labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Group", title="Training Accuracy by Block and Similarity Extreme similarity Grouping")


trSim1/trSim2


```

### Individual Learning Curves
```{r}
#| label: fig-train-sim-indv
#| fig-cap: "Training accuracy by block and similarity rating" 
#| fig-width: 12
#| fig-height: 15

 trIndv <- cat_sim |> filter(Phase==1, !(sim_group=="Medium")) |> 
  group_by(condit,Block) |> 
  mutate(Quartile = as.factor(ntile(resp, 4))) |> 
  ggplot(aes(x=Block,y=Corr, col=sim_group)) + 
      stat_summary(shape=0,geom="point",fun="mean")+
  stat_summary(geom="line",fun="mean")+ 
   facet_wrap2(~condit+sim_group+sbjCode) +
  labs(x="Training Block", y="CatLearn Accuracy",fill="Similarity Rating Group", title="Training Accuracy by Block and Similarity Extreme similarity Grouping")

trIndv 

#cat_sim |> filter(Phase==1) |> group_by(id) |> summarize(n_sim = n_distinct(sim_group)) |> arrange(desc(n_sim)) |> head(10)

# cat_sim |> filter(Phase==1) |> group_by(id) |> mutate(n_sim = n_distinct(sim_group)) |> filter(n_sim>1)  |> group_by(condit,Block) |> 
#   ggplot(aes(x=Block,y=Corr, col=sim_group)) + 
#       stat_summary(shape=0,geom="point",fun="mean")+
#   stat_summary(geom="line",fun="mean")+ 
#    facet_wrap2(~condit+sbjCode)


```

:::

### Correlations between similarity ratings and CatLearn accuracy

```{r}
#| label: fig-sim-acc-corr
#| fig-cap: "Correlations between similarity ratings and CatLearn accuracy" 
#| fig-width: 11
#| fig-height: 9


p1 <- cat_sim_test |> ggplot(aes(x=resp,y=Corr,col=Pattern.Type,fill=Pattern.Type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
 labs(y="CatLearn Accuracy", x="Similarity Rating")

p2 <- cat_sim_test |> ggplot(aes(x=resp,y=Corr, col=Pattern.Type,fill=Pattern.Type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~condit) + labs(y="CatLearn Accuracy", x="Similarity Rating")


p3 <- cat_sim_test |> 
mutate(Decile = ntile(resp, 10)) |> 
ggplot(aes(x=Decile,y=Corr, col=Pattern.Type,fill=Pattern.Type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
 labs(y="CatLearn Accuracy", x="Similarity Rating Decile")


p4 <- cat_sim_test |> 
mutate(Decile = ntile(resp, 10)) |> 
ggplot(aes(x=Decile,y=Corr, col=condit,fill=condit)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
 labs(y="CatLearn Accuracy", x="Similarity Rating Decile", fill="Training Condition", col="Training Condition")


(p1+p2) / (p3+p4)
```


#### Stucture of dataframe after merging similarity ratings with CatLearn accuracy

- Each subject in the 2024 study has a similarity score for each of their 3 categories. (averaged over 2 comparisons with that categories prototype)
- The same category similarity scores are then compared to their accuracy for each of the 5 Pattern Tyeps (old, prototype, new low, new med, new high)
- 5 Pattern types * 3 Categories = 15 comparisons per subject

::: {#tbl-ee-brms1}
| sbjCode | condit | Category | Pattern.Type | n | CatLearn Accuracy | n_rating | Category Similarity | sd  |      item      |
|:-------:|:------:|:--------:|:------------:|:-:|:-----------------:|:--------:|:-------------------:|:---:|:--------------:|
|   316   |  high  |    1     |  prototype   | 1 |         1         |    60    |         5.1         | 2.6 | 316_high_1_285 |
|   316   |  high  |    1     |     old      | 9 |        0.4        |    60    |         5.1         | 2.6 | 316_high_1_285 |
|   316   |  high  |    1     |   new_low    | 3 |        0.3        |    60    |         5.1         | 2.6 | 316_high_1_285 |
|   316   |  high  |    1     |   new_med    | 6 |        0.3        |    60    |         5.1         | 2.6 | 316_high_1_285 |
|   316   |  high  |    1     |   new_high   | 9 |        0.4        |    60    |         5.1         | 2.6 | 316_high_1_285 |
|   316   |  high  |    2     |  prototype   | 1 |         0         |    60    |         3.8         |  2  | 316_high_2_327 |
|   316   |  high  |    2     |     old      | 9 |        0.7        |    60    |         3.8         |  2  | 316_high_2_327 |
|   316   |  high  |    2     |   new_low    | 3 |        0.7        |    60    |         3.8         |  2  | 316_high_2_327 |
|   316   |  high  |    2     |   new_med    | 6 |        0.2        |    60    |         3.8         |  2  | 316_high_2_327 |
|   316   |  high  |    2     |   new_high   | 9 |        0.2        |    60    |         3.8         |  2  | 316_high_2_327 |
|   316   |  high  |    3     |  prototype   | 1 |         0         |    60    |         5.2         | 2.4 | 316_high_3_287 |
|   316   |  high  |    3     |     old      | 9 |        0.1        |    60    |         5.2         | 2.4 | 316_high_3_287 |
|   316   |  high  |    3     |   new_low    | 3 |        0.7        |    60    |         5.2         | 2.4 | 316_high_3_287 |
|   316   |  high  |    3     |   new_med    | 6 |         0         |    60    |         5.2         | 2.4 | 316_high_3_287 |
|   316   |  high  |    3     |   new_high   | 9 |        0.3        |    60    |         5.2         | 2.4 | 316_high_3_287 |

Example of how data is structure after combining similarity ratings with CatLearn accuracy. 5 Pattern types * 3 Categories = 15 rows per subject in the 2024 study
{.striped .hover .sm}
::: 



```{r}
#| label: tbl-htw-modelError-e1
#| tbl-cap: "Example of how data is structure after combining similarity ratings with CatLearn accuracy. 5 Pattern types * 3 Categories = 15 rows per subject in the 2024 study"
#| eval: true

cat_sim_test %>% # round all numerics except sbjCode to 2 decimal places
 mutate(across(where(is.numeric), ~round(., 1))) |> select(-id,-sim_group,-item_label) |> 
  relocate(item,file, .after=sd) |>
  select(-Phase,-Block) |> 
  rename("Category Similarity" = resp, "CatLearn Accuracy" = Corr) |>
   DT::datatable(options = list(pageLength = 6))

# cat_sim_test %>% # round all numerics except sbjCode to 2 decimal places
#      mutate(across(where(is.numeric), ~round(., 1))) |> select(-id,-sim_group,-item_label) |> 
#      filter(sbjCode==316) |> 
#       relocate(item,file, .after=sd) |>
#       select(-Phase,-Block, -file) |> 
#       rename("Category Similarity" = resp, "CatLearn Accuracy" = Corr) |> pander::pandoc.table(style="rmarkdown",split.table=Inf)
```







## Data Inspection & Sanity Checks




```{r}
#| label: tbl-totals
#| tbl-cap: "Current counts of unique subjects, and prototype sets"

avg_set_rating <- setCounts2 |> summarise("Avg Ratings Per Set" = mean(n)) |> pull(1)

d |> 
  summarize("N Subjects" = n_distinct(sbjCode), "N Prototype Sets" = n_distinct(set)) |> 
  mutate("Avg Ratings Per Set" = avg_set_rating) |>
  kbl()

```



### Prototype set counts

\ \

```{r}
setCounts2 |> group_by(n) |> summarise(nc=n()) |> rename("Number of times prototype set has been included in the study"=n, "Number of prototype sets with this count"=nc) |> gt() |> 
  tab_spanner(label = "Prototype Set Counts") |> 
  tab_header(title = "Prototype Set Counts") |> 
  tab_source_note(
    "Note: The number of times a prototype set has been included in the study is equal to the number of participants who rated the set."
  )
```


```{r}
#| fig-cap: Prototype set counts
#| fig-width: 12
#| fig-height: 9



# d |> filter(sbjCode==11) |> select(sbjCode,date,trial,pair_label,set,rt,time_elapsed,time)

# d |> group_by(sbjCode,set) |> 
#   summarize (n=n()) |>
#   gt()

#d |> group_by(sbjCode, item_label_1, item_label_2) |> summarise(n=n())

# (1-.33)^8
# (factorial(8)/(factorial(6)*factorial(8-6))) * (.33^6)*((1-.33)^(8-6))
# (factorial(8)/(factorial(7)*factorial(8-7))) *(.33^6)*((1-.33)^(8-7))
# (factorial(8)/(factorial(8)*factorial(8-8))) *(.33^6)*(1-.33)^(8-8)

# d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
#   group_by(sbjCode, item) |> summarise(n=n())

# patternCounts <- d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
#   group_by(item) |> summarise(n=n(),resp=mean(response),sd=sd(response)) |> arrange(desc(n))



# d |> 
#     pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> select(sbjCode,set,pair_label,item_label,item,response) |>  group_by(set) |>
#     summarize(n=n_distinct(sbjCode)) |> arrange(desc(n)) 




# d |> group_by(sbjCode, file) |> summarise(n=n())
# d |> group_by(sbjCode, set) |> summarise(n=n())

# d |> group_by(sbjCode) |> summarise(n_distinct(file))
# d |> group_by(sbjCode) |> summarise(n_distinct(set))


# sp <- setCounts2 |> 
#   mutate(set=reorder(set,n)) |>
#   ggplot(aes(x=set,y=n)) +
#    geom_col() +
#    theme(legend.title=element_blank(),
#       axis.text.x = element_text(size=5,angle = 90, hjust = 0.5, vjust = 0.5)) +
#     labs(x="Prototype Set", y="Number of Participants to rate set") 

sh <- setCounts2 |> 
  ggplot(aes(x=n)) + geom_histogram(binwidth = 1) +
  scale_x_continuous(breaks=seq(0, max(setCounts2$n), by = 1)) +
  geom_text(stat="count", aes(label=..count..), vjust=-0.5) +
  labs(x="Number of times prototype set has been included in the study", 
  y="Number of prototype sets for each count") 


#sp/sh

sh

```



\ \

### Rating Distributions


```{r}
#| fig-cap: Rating distributions
#| fig-width: 12
#| fig-height: 14

pgr <- d |> 
  ggplot(aes(x=response))+geom_histogram(binwidth=1) + 
      scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) + labs(title="Aggregate Rating Distribution", x="Rating", y="Count") 

pir <- d |>  ggplot(aes(x=response))+
      geom_histogram(binwidth=1) + 
      facet_wrap(~sbjCode) + 
      scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) + labs(title="Rating Distribution per Sbj.", x="Rating", y="Count") 

pgr/pir

```


### Reaction Time Distributions

```{r}
#| fig-cap: Reaction time distributions
#| fig-width: 12
#| fig-height: 14


prtg <- d |> ggplot(aes(x=rt))+
  geom_density() + 
  labs(title="Aggregate Reaction Time Distribution", x="Reaction Time (ms)", y="Density")

prtid <- d |> ggplot(aes(x=rt))+geom_density() + 
  facet_wrap(~sbjCode,scale="free_x") + labs(title="Reaction Time Distribution per Sbj.", x="Reaction Time (ms)", y="Density")

prtg/prtid

```


### Individual Subject Ratings

```{r}
#| label: tbl-indv-sbj
#| tbl-cap: "Individual Subject Ratings "


# d |> summarize(n=n(), n_distinct(sbjCode), n_distinct(file), n_distinct(set), n_distinct(trial), n_distinct(item_label_1), n_distinct(item_label_2))


# d %>%
#   filter(sbjCode == 11) %>%
#   select(sbjCode, date, trial, set, rt, time) %>%
#   mutate(time_parsed = parse_date_time(paste(date, time), orders = c("mdY IMS p", "mdy IMS p"))) %>%
#   group_by(sbjCode, date) %>%
#   summarise(start_time = min(time_parsed), end_time = max(time_parsed)) %>%
#   mutate(endTimeMinusStart = end_time - start_time)

plot_hist_sbj <- function(id) {
  d |> filter(sbjCode==id) |>
    ggplot(aes(x = response)) +
    geom_histogram(binwidth=1,fill = 'dodgerblue4') +
    scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) +
    theme_minimal() +
    theme(axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.x=element_text(size=26))  
}


sbj_sum <- d |> group_by(sbjCode) |> 
#filter(sbjCode<5) |>
  mutate(time_parsed = parse_date_time(paste(date, time), orders = c("mdY IMS p", "mdy IMS p"))) |>
  summarize ("Mean Rating"=mean(response),
  "SD Rating"=sd(response), 
  "Mean RT"=mean(rt)/1000, 
  #"Total Time (min)" = max(time_elapsed)/60000,
  "Total Time (min)" = round(difftime(max(time_parsed), min(time_parsed), units = "mins"),1),
  n_prototype_sets = n_distinct(set), 
  "N Trials" = n_distinct(trial)) |> 
  mutate("Response_Distribution"=sbjCode) 

  sbj_sum |> gt() |> 
    text_transform(
    locations = cells_body(columns = 'Response_Distribution'),
    fn = function(column) {
      map(column, plot_hist_sbj) |>
        ggplot_image(height = px(80), aspect_ratio = 3)
    }
    )
  

sbj_sum |> mutate(total_time=as.numeric(`Total Time (min)`)) |> 
  rename("Subject"=sbjCode, "N_Sets" = n_prototype_sets) |> 
  summarize("Median Completition time (min)"=median(total_time), 
  "Average Completion Time" = mean(total_time), 
  "Min Completion Time (min)" = min(total_time), 
  "Max Completion Time (min)" = max(total_time)) |> kbl()

```




## Lowest and Highest Rated Pairs

```{r}
#| fig-width: 11
#| fig-height: 9

# patternCounts |> filter(n>=8) |>  slice_min(resp)
# patternCounts |> filter(n>=8) |>  slice_max(resp)

# setCounts |> filter(n>=24) |>  slice_min(resp)
# setCounts |> filter(n>=24) |>  slice_max(resp)


# pairCounts |> filter(n>=5) |>  slice_min(resp,n=2)
# pairCounts |> filter(n>=5) |>  slice_max(resp)

min_resp=7
n_show=3

d %>% filter(pair_label %in% {pairCounts |> filter(n>=min_resp) |>  
  slice_min(mean_resp,n=n_show, with_ties=FALSE) |> pull(pair_label)} ) |>
  group_by(pair_label) |>
  slice_head(n=1) %>%
  plot_dotsAll() + 
  plot_annotation(title=glue::glue("Lowest rated pairs ( out of sets with n>={min_resp} ratings)"), theme = theme(plot.title = element_text(hjust = 0.4)))


d %>% filter(pair_label %in% {pairCounts |> filter(n>=min_resp) |>  
  slice_max(mean_resp,n=n_show, with_ties=FALSE) |> pull(pair_label)} ) |>
  group_by(pair_label) |>
  slice_head(n=1) %>%
  plot_dotsAll() +  
  plot_annotation(title=glue::glue("Highest rated pairs ( out of sets with n>={min_resp} ratings)"), theme = theme(plot.title = element_text(hjust = 0.4)))


```


## Pattern Pair Table
**All pairs with >=25 ratings**

- click on column headers to change sort order
  - e.g. clicking on "Mean Rating" will toggle showing the pairs rated most similar or most dissimilar
  - clicking on "SD" will toggle showing the pairs with the most or least agreement in ratings
- **note your screen may need to be at full width to see all columns**

::: column-page-right

```{r}
plot_hist_pair <- function(Pair) {
  d |> filter(pair_label==Pair) |>
    ggplot(aes(x = response)) +
    geom_histogram(binwidth=1,fill = 'dodgerblue4') +
    scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) +
    theme_minimal() +
    theme(axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.x=element_text(size=26))  
}



pat_table_plot <- function(Pair){

  df <- d |> filter(pair_label==Pair) |> slice_head(n=1) 

    pat1 <- df %>%
          mutate(pattern_1 = purrr::map(pattern_1, jsonlite::fromJSON)) %>%
          unnest(pattern_1) %>%
          mutate(y=-y, pat=item_label_1) |> select(pair_label,x,y,pat)

    pat2 <- df %>%
          mutate(pattern_2 = purrr::map(pattern_2, jsonlite::fromJSON)) %>%
          unnest(pattern_2) %>%
          mutate(y=-y, pat=item_label_2) |> select(pair_label,x,y,pat)


    pat <- rbind(pat1,pat2)

     pat |> 
    ggplot(aes(x = x, y = y,fill=pat,col=pat)) +
          geom_point(alpha=2) +
          coord_cartesian(xlim = c(-25, 35.5), ylim = c(-25, 25)) +
          theme_minimal() +
          facet_wrap2(~pat,ncol=2,axes="all") + 
          #theme_blank +
          theme_void() + 
          theme(strip.text = element_text(size = 7,hjust=.5),
                panel.spacing.x=unit(-7.3, "lines"), 
                #strip.background = element_rect(colour = "black", linewidth = 2),
                legend.position = "none",
        axis.line.y = element_line(colour = "black", linewidth = .1)) 
}



p5 <- pairCounts |> filter(n>=32) 

p5 |> 
arrange(mean_resp) |>
relocate(pair_label,.after=sd) |>
rename("Pair"=pair_label, "N"=n, "Mean Rating"=mean_resp, "SD"=sd) |>
mutate(Rating_Dist=Pair) |> 
#group_by(Pair) |> 
gt() |> 
tab_options(table.font.size = px(8L)) |>
  cols_width(
    set ~ px(116),
    Pair ~ px(415),
    `N` ~ px(50),
    `Mean Rating` ~ px(90),
    SD ~ px(55)
  ) |>  
  fmt_number(decimals = 1) |> #fmt_integer() |>
  cols_align('left', columns = set) |> 
  text_transform(
    locations = cells_body(columns = Pair),
    fn = function(column) {
      map(column, pat_table_plot) |>
        ggplot_image(height = px(230), aspect_ratio = 1.8)
    }
  ) |>
  text_transform(
    locations = cells_body(columns = Rating_Dist),
    fn = function(column) {
      map(column, plot_hist_pair) |>
        ggplot_image(height = px(150), aspect_ratio = 1)
    }
  ) |>  
   opt_interactive(page_size_default=5, 
     use_page_size_select= TRUE, use_search=TRUE, use_resizers=TRUE,use_filters=TRUE, page_size_values = c(5, 10, 25, 50, 100)) 
    

```

:::










<!-- ### Plot Pairs -->
```{r}
# #| fig-cap: dot plots
# #| fig-width: 10
# #| fig-height: 12

# d %>% filter(trial==1) %>%
#   plot_dots()

# d %>% filter(trial==1) %>%
#   plot_dots2()

# d %>% filter(trial<2) %>%
#   plot_dotsAll()

```



<!-- ### Compare to original prototypes -->
```{r}
#| fig-width: 6
#| fig-height: 9


# d %>% filter(file %in% unique(d$file[1])) %>%
#   plot_dotsAll()

# plot_dotsAll_orig <- function(df) {
#   plots <- list()

#   for (i in 1:nrow(df)) {
#     p1 <- df[i, ] %>%
#       pivot_longer(cols = starts_with("x"), names_to = "dot", values_to = "x") %>%
#       mutate(dot = as.numeric(str_remove(dot, "x"))) %>%
#       pivot_longer(cols = starts_with("y"), names_to = "dot2", values_to = "y") %>%
#       mutate(dot2 = as.numeric(str_remove(dot2, "y"))) %>%
#       filter(dot == dot2) %>%
#       ggplot(aes(x = x, y = y)) +
#       geom_point() +
#       coord_cartesian(xlim = c(-25, 25), ylim = c(-25, 25)) +
#       theme_minimal() +
#       labs(title = df$id[i]) + theme_blank

#     plots <- append(plots, list(p1))
#   }

#   patchwork::wrap_plots(plots, ncol = 1)
# }

# mc24_proto |> filter(file %in% unique(d$file[1])) %>% plot_dotsAll_orig()


```




© 2024 Thomas Gorman

site created with R and quarto

  • View source
  • Edit this page
  • Report an issue